*Specify directory here
global folder "/Users/christopherrauh/Dropbox/text analysis sandbox/programs/competition_replication"

*install packages
*ssc install scheme-burd, replace
*net install grc1leg
*ssc install carryforward


cd "$folder/output"

set scheme burd


****descriptive figure 2
clear all
use "$folder/onsets"


xtset countryid ym



foreach j in anyviolence armedconf civilwar {
	replace ons_`j'1 = 1 if `j' == 1 & F1.`j' == 1
	replace ons_`j'1 = 0 if `j' == 1 & F1.`j' == 0
}

order ons*  anyviolence armedconf civilwar *_dp


**plot conflict probabilities depending on time passed
graph drop _all
foreach j in anyviolence_dp armedconf_dp civilwar_dp {
	if "`j'" == "anyviolence_dp" {
		local x since any fatality
	}
	if "`j'" == "armedconf_dp" {
		local x since 50 fatalities
	}
	if "`j'" == "civilwar_dp" {
		local x since 500 fatalities
	}
	twoway (fpfit ons_anyviolence1 `j' if `j' >= 0 & `j' <= 25,clwidth(medthick) clcolor(black) ) ///
	(fpfit ons_armedconf1 `j' if `j' >= 0 & `j' <= 25,clwidth(medthick) clcolor(red) clpattern(dash) ) ///
	(fpfit ons_civilwar1 `j' if `j' >= 0 & `j' <= 25,clwidth(thick) clcolor(blue) clpattern(dot) ), ///
		xtitle("Months `x'") ytitle("Probability of passing threshold") ///
		legend(order(1 "1 fatality" 2 "50 fatalities"  3 "500 fatalities" ) pos(6) row(1)) plotregion(fcolor(white)) graphregion(fcolor(white)) xscale(r(0 20)) xlabel(0 5 10 15 20 25) ylabel(0(0.2)1) name(`j') 
}

graph combine anyviolence_dp armedconf_dp civilwar_dp, xsize(6) row(1)
graph export "$folder/figures/figure2.pdf", replace



*********read predictions

local t = 15
foreach h in  rf {
	foreach j in both  text history  {
		forval k = 3 / 3 {
			forval y = 2013/2020 {
				forval m = 1/12 {
					forval f = 2/7 {
						clear
						capture confirm file `h'_`j'_topics`t'_m`m'_y`y'_forward`f'_type`k'.csv
						if _rc==0 {
							insheet using `h'_`j'_topics`t'_m`m'_y`y'_forward`f'_type`k'.csv
							gen year = `y'
							gen month = `m'
							drop v1
							rename v2 `h'_`j'`f'_`k' 
							rename v3 isocode
							drop if _n == 1 
							sort isocode year month				
							save `h'_`j'_topics`t'_m`m'_y`y'_forward`f'_type`k', replace
						}
					}
				}
			}
		}
	}
}
clear
local t = 15
foreach h in  rf {
	foreach j in both  text history  {
		forval k = 3 / 3 {
			forval y = 2013/2020 {		
				forval m = 1/12 {
					forval f = 2/7 {
						
						capture confirm file `h'_`j'_topics`t'_m`m'_y`y'_forward`f'_type`k'.dta
						if _rc==0 {
							append using `h'_`j'_topics`t'_m`m'_y`y'_forward`f'_type`k'
							erase `h'_`j'_topics`t'_m`m'_y`y'_forward`f'_type`k'.dta
						}
					}
				}
			}
		}
	}
}
collapse (max) *both* *text* *history*, by(isocode year month )


sort isocode year month				
merge 1:1 isocode year month using "$folder/masterfile_competition", keepusing(month_id country_id best goal* hachris* hannes* gold* ziel*)
drop if _merge == 2
drop _merge

gen no_text = 0
foreach h in  rf {
	local fmax 7
	forval f = 2/`fmax' {
		
		gen `h'_pred`f' = `h'_both`f'_3
		
		replace no_text = 1 if  `h'_pred`f' == . & no_text == 0
		replace `h'_pred`f' = `h'_history`f'_3 if `h'_pred`f' == .
		foreach j in both  text history  {
			rename `h'_`j'`f'_3 `h'_`j'`f' 
		}
		
	}
}

drop goal* hannes* hachris*


drop *text*
save the_predictions, replace


*****create final output

clear all
use the_predictions
keep if year == 2020
local fmax 7
forval f = 2/`fmax' {	
	replace rf_pred`f' = rf_pred`f' - log(best	+ 1)
	
}

keep month_id country_id rf_pred* 
drop if country_id == .
sort country_id
save Mueller_Rauh_predictions_2020, replace
reshape long rf_pred, i(country_id) j(months)
replace month_id = month_id + months
local fmax 7
forval f = 2/`fmax' {	
	gen yourmodel_s`f' = rf_pred if months == `f'
	
}
drop rf_pred months
outsheet * using Mueller_Rauh_predictions_2020.csv, comma replace





clear all
use the_predictions
keep if year!= 2020
local fmax 7
forval f = 2/`fmax' {	
	replace rf_pred`f' = rf_pred`f' - log(best	+ 1)
	
}

keep month_id country_id rf_pred* 
drop if country_id == .
sort country_id month_id

keep month_id country_id rf_pred* 
drop if country_id == .
sort country_id
save Mueller_Rauh_predictions_earlier, replace

reshape long rf_pred, i(country_id month_id) j(months)
replace month_id = month_id + months
local fmax 7
forval f = 2/`fmax' {	
	gen yourmodel_s`f' = rf_pred if months == `f'
	
}
drop rf_pred months
collapse yourmodel*, by(country_id month_id)

outsheet * using Mueller_Rauh_predictions_earlier.csv, comma replace





*******
clear all
use the_predictions


local fmax 7
foreach k in history pred {
	forval f = 2/`fmax' {	
		replace rf_`k'`f' = rf_`k'`f' - log(best	+ 1)
	}
}
keep isocode year month month_id country_id rf_pred*  rf_history*
drop if country_id == .
sort country_id month_id			
merge 1:1 country_id month_id using "$folder/masterfile_competition", keepusing( best goal*  ziel*)

foreach k in history pred {
	forval f = 2/`fmax' {
		gen mse_`k'`f' = (rf_`k'`f' - ziel`f')^2
	}
}

collapse mse_*, by(isocode)
drop if isocode == ""

local these
graph drop _all
local fmax 7
forval f = 2/`fmax' {
	sort mse_history`f'
	twoway (scatter mse_pred`f' mse_history`f') (line  mse_history`f' mse_history`f'), legend(off) ///
		 xtitle("Without text") ytitle("With text")  subtitle(`f' periods ahead) name(fig`f')
		 local these `these' fig`f'
}
graph combine  `these'
graph export "$folder/figures/figure8.pdf", replace


**over time

clear all
use the_predictions


local fmax 7
foreach k in history pred {
	forval f = 2/`fmax' {	
		replace rf_`k'`f' = rf_`k'`f' - log(best	+ 1)
	}
}
keep isocode year month month_id country_id rf_pred*  rf_history*
drop if country_id == .
sort country_id month_id			
merge 1:1 country_id month_id using "$folder/masterfile_competition", keepusing( best goal*  ziel*)

foreach k in history pred {
	forval f = 2/`fmax' {
		gen mse_`k'`f' = (rf_`k'`f' - ziel`f')^2
	}
}

collapse mse_*, by(year month)
drop if year == .



save uch, replace

local these
graph drop _all
local fmax 7
forval f = 2/`fmax' {
	clear
	use uch
	gen ym = ym(year, month)
	replace ym = ym + `f'
	
	keep if ym >= 648
	keep if ym <= 719
	format ym %tm
	
	
	twoway (line mse_pred`f' mse_history`f' ym, lcolor(black gray) lpattern(dash solid) lwidth(medthick medthick)), ///		 
		legend(order(1 "With text" 2 "Without text")) name(fig`f') ///
		subtitle(`f' periods ahead) xtitle("") ytitle("") 
		local these `these' fig`f'
}	
erase uch.dta
grc1leg  `these',  legendfrom(fig2)  
graph export "$folder/figures/figure7.pdf", replace


*******



*******

clear 
use Mueller_Rauh_predictions_earlier
append using Mueller_Rauh_predictions_2020
drop if country_id == .
save ourdata.dta, replace

import delimited "$folder/benchmark_cm.csv", clear 
save "$folder/benchmark_cm.dta", replace

import delimited "$folder/mergeforcompetition.csv", clear 


keep country_id month_id isocode year month tokens best ste_theta0- ste_theta14 ln_best_f2-on_armedconf_f7 ln_pop childmortality anyviolence armedconf civilwar anyviolence_dp armedconf_dp civilwar_dp ged_best_ns ged_best_os

save temp.dta, replace



use ourdata.dta, clear
merge 1:1 country_id month_id  using temp.dta
drop _merge

erase temp.dta

egen countryid=group(isocode)
generate ym=ym(year, month)

keep if year>1969
gsort isocode ym
by isocode: carryforward ln_pop, replace

gsort isocode - ym
by isocode: carryforward ln_pop, replace

*fill in pop data
replace ln_pop=4.3438054 if isocode =="AND" 
replace ln_pop=6.3 if isocode =="FSM"
replace ln_pop= 3.6375862 if isocode =="LIE"
replace ln_pop= 3.6 if isocode =="MCO"
replace ln_pop= 2.3978953  if isocode =="NRU"
replace ln_pop=  3.3 if isocode =="SMR"
replace ln_pop= 2.3978953 if isocode =="TUV"
replace ln_pop= 7.5336937 if isocode =="XKX"

gsort isocode ym
by isocode: carryforward country_id, replace

gsort isocode - ym
by isocode: carryforward country_id, replace

bysort ym: egen month_mean=mean(month_id)
replace month_id=month_mean if month_id==. & month_mean!=.

drop if month_id==.
drop if country_id==.

merge 1:1 country_id month_id using "$folder/benchmark_cm.dta"




forvalues f=2/7 {
	generate gold`f'=ln_best_f`f'-ln(best+1)
}






save comparison_file.dta, replace


twoway (line gold2 ym if isocode=="EGY" & year>2009) (line rf_pred2 ym if isocode=="EGY" & year>2009, lwidth(thick) lpattern(dash)) if rf_pred2!=., ///
subtitle("Egypt", color(black)) ytitle(`"Violence change"') legend( order(2 "Model forecast" 1 "Data") size(small)) xtitle("") ///
plotregion(fcolor(white)) graphregion(fcolor(white)) 
graph export "$folder/figures/figure6_egypt.pdf", replace

twoway (line gold2 ym if isocode=="CMR" & year>2009) (line rf_pred2 ym if isocode=="CMR" & year>2009, lwidth(thick) lpattern(dash)) if rf_pred6!=., ///
subtitle("Cameroon", color(black)) ytitle(`"Violence change"') legend( order(2 "Model forecast" 1 "Data") size(small)) xtitle("") ///
plotregion(fcolor(white)) graphregion(fcolor(white)) 
graph export "$folder/figures/figure6_cameroon.pdf", replace

twoway (line gold2 ym if isocode=="MOZ" & year>2009) (line rf_pred2 ym if isocode=="MOZ" & year>2009, lwidth(thick) lpattern(dash)) if rf_pred6!=., ///
subtitle("Mozambique", color(black)) ytitle(`"Violence change"') legend( order(2 "Model forecast" 1 "Data") size(small)) xtitle("") ///
plotregion(fcolor(white)) graphregion(fcolor(white)) 
graph export "$folder/figures/figure6_mozambique.pdf", replace

twoway (line gold2 ym if isocode=="LBY" & year>2009) (line rf_pred2 ym if isocode=="LBY" & year>2009, lwidth(thick) lpattern(dash)) if rf_pred6!=., ///
subtitle("Libya", color(black)) ytitle(`"Violence change"') legend( order(2 "Model forecast" 1 "Data") size(small)) xtitle("") ///
plotregion(fcolor(white)) graphregion(fcolor(white)) 
graph export "$folder/figures/figure6_libya.pdf", replace


****

twoway (line gold7 ym if isocode=="VEN" & year>2009) (line rf_pred7 ym if isocode=="VEN" & year>2009, lwidth(thick) lpattern(dash)) if rf_pred2!=., ///
subtitle("Venezuela", color(black)) ytitle(`"Violence change"') legend( order(2 "Model forecast" 1 "Data") size(small)) xtitle("") ///
plotregion(fcolor(white)) graphregion(fcolor(white)) 
graph export "$folder/figures/figure9_venezuela.pdf", replace

twoway (line gold7 ym if isocode=="BLR" & year>2009) (line rf_pred7 ym if isocode=="BLR" & year>2009, lwidth(thick) lpattern(dash)) if rf_pred6!=., ///
subtitle("Belarus", color(black)) ytitle(`"Violence change"') legend( order(2 "Model forecast" 1 "Data") size(small)) xtitle("") ///
plotregion(fcolor(white)) graphregion(fcolor(white)) 
graph export "$folder/figures/figure10_belarus.pdf", replace


****Tables

use comparison_file.dta, clear

*calculate target from best data
forvalues f=2/7 {
generate goal`f'=ln_best_f`f'-ln(best+1)
}

*calculate square error
forvalues f=2/7 {
quietly generate squareerror_both`f'=(goal`f'-rf_pred`f')^2

}

****Table 1
summarize squareerror_both2 squareerror_both3 squareerror_both4 squareerror_both5 squareerror_both6 squareerror_both7 if squareerror_both2!=. & year>2013 & year<2017

summarize squareerror_both2 squareerror_both3 squareerror_both4 squareerror_both5 squareerror_both6 squareerror_both7 if squareerror_both2!=. & year>2016 & year<2020


****Table 2
tsset country_id month_id
summarize squareerror_both2 if squareerror_both2!=. & year>2013 & year<2017 & best==0

summarize squareerror_both2 if squareerror_both2!=. & year>2016 & year<2020 & best==0

summarize squareerror_both2 if squareerror_both2!=. & year>2013 & year<2017 & best==0 & F2.best>0 & F2.best!=.

summarize squareerror_both2 if squareerror_both2!=. & year>2016 & year<2020 & best==0 & F2.best>0 & F2.best!=.

summarize squareerror_both2 if squareerror_both2!=. & year>2013 & year<2017 & best==0 & F2.best>0 & F2.best!=. & L1.best==0 & L2.best==0 & L3.best==0

summarize squareerror_both2 if squareerror_both2!=. & year>2016 & year<2020 & best==0 & F2.best>0 & F2.best!=. & L1.best==0 & L2.best==0 & L3.best==0
